# ========================================================================== #
# Replication code for: The Fall of Trump: Mobilization and Vote Switching   #
# in the 2020 Presidential Election                                          #
# Authors: Enrijeta Shino, Seth C. Mckee and Daniel A. Smith                 # 
# Journal:  Political Science Research and Methods                           #
# Year: 2023                                                                 #
# ========================================================================== #

# clean environment
rm(list = ls())

# load the libraries
library(data.table)
library(survey)
library(mfx)
library(descr)
library(questionr)
library(weights)
library(texreg)
library(xtable)
library(scales)
library(margins)
library(ggeffects)
library(sjmisc)
library(ggplot2)
library(latex2exp)
library(readstata13)
library(gdata)
library(RCurl)
library(tidyr)
library(ggridges)
library(viridis)
library(systemfonts)
library(hrbrthemes)
library(ggrepel)
library(BBmisc)
library(psych)
library(plotROC)
library(stats)
library(aod)

# set working directory
setwd("/Users/Enrijeta/Dropbox/VoteSwitchers/")


#=================#
# CES 2016 SURVEY #
#=================#

# Load CES 2016 dataset
cces16 <- read.dta13("/Users/Enrijeta/Dropbox/VoteSwitchers/CCES2016/CCES16_Common_OUTPUT_Feb2018_VV.dta")

# Recode party ID: 1 = strong Democrat ---> 7 = strong Republican. Missing values are dropped.
cces16$pid71 <- as.numeric(cces16$pid7)
cces16$pid71[cces16$pid71 == 8] <- NA
table(cces16$pid71)

# Validated vote 2016: 1 = validated voter; 0 = non validated voter.
cces16$vote16_validated <- ifelse(cces16$CL_E2016GVM == "absentee" | cces16$CL_E2016GVM == "earlyVote" |
                                  cces16$CL_E2016GVM == "mail" | cces16$CL_E2016GVM == "polling", 1, 0)
table(cces16$vote16_validated)

# Vote self-report 2016: 1 = self-reported voter; 0 = self-reported non-voter
cces16$voted16 <- ifelse(cces16$CC16_401 == "I definitely voted in the General Election.", 1, 0)
table(cces16$voted16)

# Combine self-reported and validated vote in 2016: 1 = self-reported and validated voter; 0 = otherwise
cces16$voter2016 <- ifelse(cces16$voted16 == 1 & cces16$vote16_validated== 1, 1, 0)
table(cces16$voter2016)


#=================#
# CES 2020 SURVEY #
#=================#

# Load CES 2020 dataset
cces20 <- read.dta13("/Users/Enrijeta/Dropbox/VoteSwitchers/CCES2020_data/CES20_Common_OUTPUT_vv.dta")


#=================#
## RECODE SCALES ##
#=================#

# Sociotropic economy: 1 = much better --> 5 = much worse over the last year. Missing values (6 = not sure) are dropped.
cces20$economy_socio <- cces20$CC20_302
cces20$economy_socio[cces20$economy_socio==6] <- NA
table(cces20$economy_socio)

# Rescale sociotropic economy to vary from 0 --> 1
cces20$economy_socio_rescaled <- scales::rescale(cces20$economy_socio, to = c(0,1))
table(cces20$economy_socio_rescaled)

# Pocketbook economy: 1 = increased a lot --> 5 = decreased a lot
cces20$economy_pocketbook <- cces20$CC20_303
table(cces20$economy_pocketbook)

# Rescale pocketbook economy to vary from 0 --> 1
cces20$economy_pocketbook_rescaled <- scales::rescale(cces20$economy_pocketbook, to = c(0, 1))
table(cces20$economy_pocketbook_rescaled)


#========================== #
# Health care scale items   #
# Coded as:                 #
# 1 = liberal position      #
# 0 = conservative position #
#========================== #

# 1) Expand Medicare to a single comprehensive public health care coverage program that would cover all Americans
# 1 = support, 0 = oppose
cces20$healthcare1 <- ifelse(cces20$CC20_327a == 1, 1, 0)
table(cces20$healthcare1)

# 2) Allow the government to negotiate with drug companies to get a lower price on prescription drugs that would apply to both Medicare and private insurance. Maximum negotiated price could not exceed 120% of the average prices in 6 other countries.
# 1 = support, 0 = oppose
cces20$healthcare2 <- ifelse(cces20$CC20_327b == 1, 1, 0)
table(cces20$healthcare2)

# 3) Lower the eligibility age for Medicare from 65 to 50.
# 1 = support, 0 = oppose
cces20$healthcare3 <- ifelse(cces20$CC20_327c == 1, 1, 0)
table(cces20$healthcare3)

# 4) Repeal the entire Affordable Care Act.
# 0 = support, 1 = oppose
cces20$healthcare4 <- ifelse(cces20$CC20_327d == 1, 0, 1)
table(cces20$healthcare4)

# Crombach alpha for health care scale items: alpha = 0.6; median r = 0.2 
cces20 <- data.table(cces20)
healthcare_alpha <- cces20[,.(healthcare1, healthcare2, healthcare3, healthcare4)]
psych::alpha(healthcare_alpha[,c(1:4)])

# Healthcare scale variable
cces20$healthcare_scale <- cces20$healthcare1 + cces20$healthcare2 + cces20$healthcare3 + cces20$healthcare4
table(cces20$healthcare_scale)

#=================================================#
# Rescale health care scale to vary from 0 --> 1  #
# Appendix A.2: Table 1                           #
#=================================================#
cces20$healthcare_rescaled <- scales::rescale(cces20$healthcare_scale, to = c(0, 1))
table(cces20$healthcare_rescaled)


#========================== #
# Immigration scale items   #
# Coded as:                 #
# 1 = liberal position      #
# 0 = conservative position #
#========================== #

# 1) Grant legal status to all illegal immigrants who have held jobs and paid taxes for at least 3 years, and not been convicted of any felony crimes.
# 1 = support; 0 = oppose 
cces20$immigration1 <- ifelse(cces20$CC20_331a == 1, 1, 0)
table(cces20$immigration1)

# 2) Increase the number of border patrols on the US-Mexican border.
# 1 = oppose, 0 = support
cces20$immigration2 <- ifelse(cces20$CC20_331b == 1, 0, 1)
table(cces20$immigration2)

# 3) Withhold federal funds from any local police department that does not report to the federal government anyone they identify as an illegal immigrant.
# 1 = oppose, 0 = support
cces20$immigration3 <- ifelse(cces20$CC20_331c == 1, 0, 1)
table(cces20$immigration3)

# 4) Reduce legal immigration by 50 percent over the next 10 years by eliminating the visa lottery and ending family-based migration.
# 1 = oppose, 0 = support
cces20$immigration4 <- ifelse(cces20$CC20_331d == 1, 0, 1)
table(cces20$immigration4)

# 5) Increase spending on border security by $25 billion, including building a wall between the U.S. and Mexico.
# 1 = oppose, 0 = support
cces20$immigration5 <- ifelse(cces20$CC20_331e == 1, 0, 1)
table(cces20$immigration5)

# Crombach alpha for immigration policy scale: alpha= 0.82; median r = 0.47
cces20 <- data.table(cces20)
immigration_alpha <- cces20[,.(immigration1, immigration2, immigration3, immigration4, immigration5)]
psych::alpha(immigration_alpha[,c(1:5)])

# Immigration scale
cces20$immigration_scale <- cces20$immigration1 + cces20$immigration2 + cces20$immigration3 + 
                            cces20$immigration4 + cces20$immigration5

table(cces20$immigration_scale)

#=================================================================#
# Rescale immigration policy position scale to vary from 0 --> 1  #
# Appendix A.3: Table 2                                           #
#=================================================================#

cces20$immigration_rescaled <- scales::rescale(cces20$immigration_scale, to = c(0,1))
table(cces20$immigration_rescaled)


#====================#
## RECODE VARIABLES ##
#====================#

# Voted in 2016: 0 = did not vote; 1 = voted
cces20$voted2016 <- ifelse(cces20$presvote16post == 7, 0, 1)
table(cces20$voted2016)

# Voted in 2020 election: 0 = did not vote; 1 = voted
cces20$voted2020 <- ifelse(cces20$CC20_401 == 5, 1, 0)
table(cces20$voted2020)

# Validated vote 2020: 0 = not validated voter; 1 = validated voter
cces20$vote20_validated <- ifelse(cces20$CL_2020gvm < 5, 1, 0)
table(cces20$vote20_validated)

# Voted in 2020, self-reported and validated measure: 1 = self-reported and validated voter; 0 = otherwise
cces20$voter2020 <- ifelse(cces20$voted2020 == 1 & cces20$vote20_validated== 1, 1, 0)
table(cces20$voter2020)


# New voters
crosstab(cces20$voted2020, cces20$voted2016, weight = cces20$commonweight, prop.c = T)


# Vote 2016: 0 = other candidate; 1 = voted for Trump; 2 = voted for Clinton
cces20$vote16_candidate <- with(cces20, ifelse(presvote16post >= 3 & presvote16post < 7, 0,
                                        ifelse(presvote16post == 2, 1, 
                                        ifelse(presvote16post == 1, 2, NA))))
table(cces20$vote16_candidate)


# Vote choice 2016: 1 = voted for Clinton, 0 = voted for Trump
cces20$votechoice16 <- with(cces20, ifelse(vote16_candidate == 2, 1,
                                    ifelse(vote16_candidate == 1, 0, NA)))

table(cces20$votechoice16)


# Vote intent in 2020 for those who voted in 2016: 0 = other candidate; 1 = support Trump; 2 = support Biden
cces20$vote_intent_voters16 <- with(cces20, ifelse(voted2016==1 & CC20_364b == 3, 0, 
                                            ifelse(voted2016==1 & CC20_364b == 1, 1, 
                                            ifelse(voted2016==1 & CC20_364b == 2, 2, NA))))

table(cces20$vote_intent_voters16)


# Vote intent in 2020 for abstainers in 2016: 1 = support Trump; 2 = support Biden
cces20$vote_intent_absteiners16 <- with(cces20, ifelse(voted2016==0 & CC20_364b == 1, 1, 
                                                ifelse(voted2016==0 & CC20_364b == 2, 2, NA)))
table(cces20$vote_intent_absteiners16)
prop.table(table(cces20$vote_intent_absteiners16))


# Vote intent in 2020: 0 = other candidate; 1 = support Trump; 2 = support Biden
cces20$vote_intent20 <- with(cces20, ifelse(CC20_364b == 3, 0, 
                                     ifelse(CC20_364b == 1, 1, 
                                     ifelse(CC20_364b == 2, 2, NA))))
table(cces20$vote_intent20)


# Vote intent in 2020: 0 = support Trump; 1 = support Biden
cces20$voteintent20_pre <- with(cces20, ifelse(CC20_364b == 1, 0, 
                                        ifelse(CC20_364b == 2, 1, NA)))
table(cces20$voteintent20_pre)


# Vote choice in 2020 (post-election survey): 0 = Trump, 1 = Biden
cces20$votechoice2020_post <- with(cces20, ifelse(CC20_410 == 1, 1, 
                                           ifelse(CC20_410 == 2, 0, NA)))

table(cces20$votechoice2020_post)


# Vote intent in 2020: 1 = support Biden; 0 = support Trump. Validated voter.
cces20$biden <- ifelse(cces20$vote_intent20 == 2 & cces20$vote20_validated==1, 1,
                ifelse(cces20$vote_intent20 == 1 & cces20$vote20_validated==1, 0, NA))

prop.table(table(cces20$biden))



#==================#
# Policy Issues    #
#==================#

# Contracted COVID-19: 0 = nobody, 1 = myself, a family member, a friend, a coworker 
cces20$contracted_covid <- with(cces20, ifelse(CC20_309a_1 == 1 | CC20_309a_2 == 1 | 
                                               CC20_309a_3 == 1 | CC20_309a_4 == 1 , 1,
                                        ifelse(CC20_309a_5 == 1, 0, NA)))
table(cces20$contracted_covid)


# Know somebody who died from COVID-19: 0 = nobody, 1 = a family member; a friend or a coworker 
cces20$died_covid <- with(cces20, ifelse(CC20_309b_1 == 1 | CC20_309b_2 == 1 | CC20_309b_3 == 1, 1,
                                  ifelse(CC20_309b_4 == 1, 0, NA)))
table(cces20$died_covid)


# Economic impact of COVID-19: 0 = no impact; 1 = impact (reduced hours, laid off, lost job)
cces20$covid_econ <- with(cces20, ifelse(CC20_309c_1 == 1 | CC20_309c_3 == 1 | 
                                         CC20_309c_5 == 1 | CC20_309c_6 == 1, 1, 
                                  ifelse(CC20_309c_2 == 1 | CC20_309c_4 == 1| CC20_309c_7 == 1|
                                         CC20_309c_8 == 1 | CC20_309c_9 == 1| CC20_309c_10 == 1, 0, NA)))

table(cces20$covid_econ)


# Feelings about police: 0 = feels safe; 1 = feels unsafe
cces20$police <- with(cces20, ifelse(CC20_307 == 1 | CC20_307 == 2, 0, 1))
table(cces20$police)


# Victim of a crime: 1 = yes; 0 = no
# cces20$crime_victim <- ifelse(cces20$CC20_305_9 == 1, 1, 0)
# table(cces20$crime_victim)


# Replace RBG: 0 = before elections; 1 = after elections
cces20$scotus_rbg <- ifelse(cces20$CC20_356a == 1, 0, 
                     ifelse(cces20$CC20_356a == 2, 1, NA))

table(cces20$scotus_rbg)


# Follow news about politics: 1 = hardly at all ---> 4 = most of the time. Missing values are dropped. 
cces20$newsint[cces20$newsint == 7] <- NA

cces20$political_awareness <- 5 - cces20$newsint
table(cces20$political_awareness)


# Party ID: 1 = no party affiliate (NPA); 2 = Democrat; 3 = Republican
cces20$party_id <- with(cces20, ifelse(pid3 == 3, 1, 
                                ifelse(pid3 == 1, 2, 
                                ifelse(pid3 == 2, 3, NA))))

table(cces20$party_id)
cces20$party_id <- factor(cces20$party_id)


# Party ID: 1 = strong Democrat ---> 7 = strong Republican. Remove missing data
cces20$pid7[cces20$pid7 == 8] <- NA
table(cces20$pid7)

# Ideology: 1 = very liberal ---> 5 = very conservative ("not sure" recoded as moderates)
cces20$ideo5[cces20$ideo5 == 6] <- 3
cces20$ideology <- cces20$ideo5
table(cces20$ideology)

# Education: 0 = high school or less; 1 = some college + college+
cces20$education <- with(cces20, ifelse(educ == 1 | educ == 2, 0, 1))
table(cces20$education)


# Race: 0 = white, 1 = black, 2 = Hispanic, 3 = race other 
cces20$race_cat <- with(cces20, ifelse(race == 1, 0,
                                ifelse(race == 2, 1,
                                ifelse(race == 3, 2, 3))))

table(cces20$race_cat)
cces20$race_cat <- factor(cces20$race_cat)

# Income: income continuous. Missing values dropped.
cces20$faminc_new[cces20$faminc_new == 97] <- NA

cces20$income <- cces20$faminc_new
table(cces20$income)

# Sex: 0 = male; 1 = female
cces20$female <- ifelse(cces20$gender == 2, 1, 0)
table(cces20$female)

# Respondent's age in 2020
cces20$age <- 2020 - cces20$birthyr
table(cces20$age)


# Young voters who couldn't vote in 2016
cces20$young_nonvoter16 <- ifelse(cces20$age < 22, 1, 0)
table(cces20$young_nonvoter16)

cces20 <- data.table(cces20)


#=============================#
# DV for CES: Trump Switchers #
#=============================#

# 1 = Trump voters in 2016 who voted for Biden in 2020; 0 = Trump standpatters
cces20$trump_switchers <- with(cces20, ifelse(vote16_candidate==1 & vote_intent_voters16 == 2 & vote20_validated==1, 1,
                                       ifelse(vote16_candidate==1 & vote_intent_voters16 == 1 & vote20_validated==1, 0, NA)))
table(cces20$trump_switchers)


#================================#
# DV for CES: Democrat Switchers #
#================================#

# 1 = Clinton voters in 2016 who voted for Trump in 2020; 0 = Democrat standpatters
cces20$dem_switchers <- with(cces20, ifelse(vote16_candidate==2 & vote_intent_voters16 == 1 & vote20_validated==1, 1,
                                     ifelse(vote16_candidate==2 & vote_intent_voters16 == 2 & vote20_validated==1, 0, NA)))
table(cces20$dem_switchers)


#================================#
# DV for CES: Third Party Voters #
#================================#

# 1 = Third party voter in 2016 who voted for Biden in 2020; 0 = Third party standpatters
cces20$other_dem_switchers <- with(cces20, ifelse(vote16_candidate==0 & vote_intent_voters16 == 2 & vote20_validated==1, 1, 
                                           ifelse(vote16_candidate==0 & vote_intent_voters16 == 0 & vote20_validated==1, 0, NA)))
table(cces20$other_dem_switchers)


# 1 = Third party voter in 2016 who voted for Trump in 2020; 0 = Third party standpatters
cces20$other_trump_switchers <- with(cces20, ifelse(vote16_candidate==0 & vote_intent_voters16 == 1 & vote20_validated==1, 1,
                                             ifelse(vote16_candidate==0 & vote_intent_voters16 == 0 & vote20_validated==1, 0, NA)))
table(cces20$other_trump_switchers)


#================================================================= #
# 2016 major party voters who switched to 2020 minor party voters  #
#================================================================= #

# 1 = Clinton voter in 2016 who voted for a third party candidate in 2020; 0 = standpatters
cces20$dem_minor_switchers <- with(cces20, ifelse(vote16_candidate==2 & vote_intent_voters16 == 0 & vote20_validated==1, 1,
                                           ifelse(vote16_candidate==2 & vote_intent_voters16 == 2 & vote20_validated==1, 0, NA)))
table(cces20$dem_minor_switchers)


# 1 = Trump voter in 2016 who voted for a third party candidate in 2020; 0 = standpatters
cces20$rep_minor_switchers <- with(cces20, ifelse(vote16_candidate==1 & vote_intent_voters16 == 0 & vote20_validated==1, 1,
                                           ifelse(vote16_candidate==1 & vote_intent_voters16 == 1 & vote20_validated==1, 0, NA)))
table(cces20$rep_minor_switchers)


# Correlation coefficient for party ID and supreme court question
cces20$party_id1 <- as.numeric(cces20$party_id)
# correlation between D two-party vote share and turnout
cor.test(cces20$party_id1, cces20$scotus_rbg, use = "complete.obs") # 0.34



# ========================================#
# Standpatters 2016 --> 2020              #
# 1 = D standpatters ; 0 = R standpatters #
# ========================================#
cces20$dem_standpatters <- with(cces20, ifelse(vote16_candidate==2 & vote_intent_voters16 == 2 & vote20_validated==1, 1,
                                        ifelse(vote16_candidate==1 & vote_intent_voters16 == 1 & vote20_validated==1, 0, NA)))
table(cces20$dem_standpatters)


#==============================================#
# DV: Abstainers 2016 to R or D voters in 2020 # 
#==============================================#

# 0 = abstainers who supported Trump; 1 = abstainers who supported Biden
cces20$biden_2020 <- with(cces20, ifelse(voted2016==0 & vote_intent_absteiners16 == 1 & vote20_validated ==1, 0, 
                                  ifelse(voted2016==0 & vote_intent_absteiners16 == 2 & vote20_validated ==1, 1, NA)))
table(cces20$biden_2020)


#==============================================#
# Table 1: Standpatters, Switchers, New voters # 
#==============================================#

# Validated voters
# 1 = New voters; 2 = Switchers to major party; 3 = Switchers (R-->D, D-->R); 4 = Standpatters
cces20$cummulative_var <- with(cces20, ifelse(voted2016==0 & voted2020==1 & vote20_validated == 1, 1, 
                                       ifelse(vote16_candidate==0 & vote_intent20!=0 & vote20_validated == 1, 2, 
                                       ifelse((vote16_candidate==1 & vote_intent20==2 & vote20_validated == 1)| 
                                              (vote16_candidate==2 & vote_intent20==1 & vote20_validated == 1), 3,
                                       ifelse(vote16_candidate==vote_intent20 & vote20_validated == 1, 4, NA)))))

table(cces20$cummulative_var)


#=========#
# Table 1 #
#=========#
round(prop.table(table(cces20$cummulative_var)),4)*100
crosstab(cces20$cummulative_var, cces20$pid7, weight = cces20$vvweight, prop.r = T, plot = F, digits = 2)

#=========#
# Table 2 # 
#=========#
crosstab(cces20$cummulative_var[cces20$biden==1], cces20$pid7[cces20$biden==1], prop.r = T, plot = F, digits = 2)


# Non validated voters
# 1 = new voters; 2 = Switchers to major party; 3 = switchers R-->D, D-->R; 4 = Standpatters
cces20$votertype_notvalidated <- with(cces20, ifelse(voted2016==0 & voted2020==1, 1, 
                                              ifelse(vote16_candidate==0 & vote_intent20!=0, 2, 
                                              ifelse((vote16_candidate==1 & vote_intent20==2)| 
                                                     (vote16_candidate==2 & vote_intent20==1), 3,
                                              ifelse(vote16_candidate==vote_intent20, 4, NA)))))

prop.table(table(cces20$votertype_notvalidated))

crosstab(cces20$vote20_validated, cces20$votertype_notvalidated, prop.c = T, plot = F)


# Trump approval: disapprove --> approve 
cces20$CC20_320a[cces20$CC20_320a==5] <- NA

cces20$trump_approval <- 5 - cces20$CC20_320a
table(cces20$trump_approval)


# Trump approval: 1 = disapprove, 0 = approve 
cces20$trump_approval_dummy <- ifelse(cces20$trump_approval < 3, 1, 0)
table(cces20$trump_approval_dummy)

# Trump approval by voter type
crosstab(cces20$cummulative_var, cces20$trump_approval, weight = cces20$vvweight, prop.r = T, plot = F)
crosstab(cces20$cummulative_var, cces20$trump_approval_dummy, weight = cces20$vvweight, prop.r = T, plot = F)



#=============#
# Biden Vote  #
#=============#
# New voters from two party-vote share: 1 = new voters who voted for Biden; 0 = otherwise
cces20$newvoters <- with(cces20, ifelse(voted2016==0 & voted2020==1 & vote20_validated == 1 , 1, 0))
table(cces20$newvoters)

crosstab(cces20$biden, cces20$newvoters, prop.c = T)

# Switchers to major party: 1 = new voters who voted for a major party; 0 = otherwise
cces20$switchers_majorparty <- with(cces20, ifelse(vote16_candidate==0 & vote_intent20!=0 & vote20_validated == 1, 1, 0))
table(cces20$switchers_majorparty)

crosstab(cces20$biden, cces20$switchers_majorparty, prop.c = T)


# Switchers R-->D, D-->R
cces20$switchers <- with(cces20, ifelse((vote16_candidate==1 & vote_intent20==2 & vote20_validated == 1)| 
                                        (vote16_candidate==2 & vote_intent20==1 & vote20_validated == 1), 1, 0))
table(cces20$switchers)

crosstab(cces20$biden, cces20$switchers, prop.c = T)

# Standpatters
cces20$standpatters <- with(cces20, ifelse(vote16_candidate==vote_intent20 & vote20_validated == 1, 1, 0))
table(cces20$standpatters)

crosstab(cces20$biden, cces20$standpatters, prop.c = T)


# PID crosstab cummulative_var
cross_test <- crosstab(cces20$cummulative_var, cces20$pid7, prop.r = T)


# PID crosstab cummulative_var
crosstab(cces20$cummulative_var, cces20$pid7, prop.r = T)


#==================================================#
# Table 3: Policy Issues and Switcher Descriptives # 
#==================================================#

# Contracted covid19: 0 = nobody, 1 = myself or family member, a friend or a coworker
# Estimates shown are for contracted_covid = 1 (myself or family member, a friend or a coworker)
prop.table(table(cces20$contracted_covid))*100
crosstab(cces20$cummulative_var, cces20$contracted_covid, weight = cces20$vvweight, 
         prop.r = T, digits = 2, plot = F)


# Police: 1 = not safe; 0 = safe
# Estimates shown are for police = 1 (not safe)
prop.table(table(cces20$police))*100
crosstab(cces20$cummulative_var, cces20$police, weight = cces20$vvweight, 
         prop.r = T, digits = 2, plot = F)


# RBG replacement: 1 = after election; 0 = pre-election
# Estimates shown are for scotus_rbg = 1 (after election)
prop.table(table(cces20$scotus_rbg))*100
crosstab(cces20$cummulative_var, cces20$scotus_rbg, weight = cces20$vvweight, 
         prop.r = T, digits = 2, plot = F)


# Mean values for pocketbook economy 
summary(cces20$economy_pocketbook_rescaled)
summary(cces20$economy_pocketbook_rescaled[cces20$cummulative_var==1])
summary(cces20$economy_pocketbook_rescaled[cces20$cummulative_var==2])
summary(cces20$economy_pocketbook_rescaled[cces20$cummulative_var==3])
summary(cces20$economy_pocketbook_rescaled[cces20$cummulative_var==4])


# Mean values for sociotropic economy 
summary(cces20$economy_socio_rescaled)
summary(cces20$economy_socio_rescaled[cces20$cummulative_var==1])
summary(cces20$economy_socio_rescaled[cces20$cummulative_var==2])
summary(cces20$economy_socio_rescaled[cces20$cummulative_var==3])
summary(cces20$economy_socio_rescaled[cces20$cummulative_var==4])


# Mean values for health care scale
summary(cces20$healthcare_rescaled)
summary(cces20$healthcare_rescaled[cces20$cummulative_var==1])
summary(cces20$healthcare_rescaled[cces20$cummulative_var==2])
summary(cces20$healthcare_rescaled[cces20$cummulative_var==3])
summary(cces20$healthcare_rescaled[cces20$cummulative_var==4])


# Mean values for immigration scale
summary(cces20$immigration_rescaled)
summary(cces20$immigration_rescaled[cces20$cummulative_var==1])
summary(cces20$immigration_rescaled[cces20$cummulative_var==2])
summary(cces20$immigration_rescaled[cces20$cummulative_var==3])
summary(cces20$immigration_rescaled[cces20$cummulative_var==4])


#=============================================#
# Table 4: New Voters Vote Choice (Biden = 1) #
#=============================================#

# Current issues only
newvoter_biden1 <- glm(biden_2020 ~ contracted_covid + police + scotus_rbg, 
                       data = cces20, family = binomial)
summary(newvoter_biden1)

# Huber-White robust standard errors, clustered by county
coeftest(newvoter_biden1, vcov=vcovHC(newvoter_biden1, type="HC0", cluster="inputstate"))



# Current issues with controls
newvoter_biden1c <- glm(biden_2020 ~ contracted_covid + police + scotus_rbg + political_awareness + 
                          party_id + ideology + age + young_nonvoter16 + female + 
                          race_cat + education + income, 
                        data = cces20, family = binomial)
summary(newvoter_biden1c)

# Huber-White robust standard errors, clustered by county
coeftest(newvoter_biden1c, vcov=vcovHC(newvoter_biden1c, type="HC0", cluster="inputstate"))


# Current and long term issues only
newvoter_biden2 <- glm(biden_2020 ~ contracted_covid + police + scotus_rbg + 
                         economy_pocketbook_rescaled + economy_socio_rescaled + healthcare_rescaled + 
                         immigration_rescaled, data = cces20, family = binomial)
summary(newvoter_biden2)

# Huber-White robust standard errors, clustered by county
coeftest(newvoter_biden2, vcov=vcovHC(newvoter_biden2, type="HC0", cluster="inputstate"))


# Current and long term issues only with controls
newvoter_biden2c <- glm(biden_2020 ~ contracted_covid + police + scotus_rbg + 
                          economy_pocketbook_rescaled + economy_socio_rescaled + healthcare_rescaled + 
                          immigration_rescaled +  political_awareness + party_id + ideology + 
                          age + young_nonvoter16 + female + race_cat + education + income, 
                        data = cces20, family = binomial)
summary(newvoter_biden2c)

# Huber-White robust standard errors, clustered by county
coeftest(newvoter_biden2c, vcov=vcovHC(newvoter_biden2c, type="HC0", cluster="inputstate"))

# ========= #
# Model fit #
# ========= #

# Obtain predicted probabilities
pred_prob_table3 <- predict(newvoter_biden2, newdata = cces20, type = "response")

# Add predicted value to our dataset
data_predicted1 <- data.frame(cces20$biden_2020, pred_prob_table3, "Non Voter 2016 --> Biden Voter 2020")


#==============#
# Model Output #
#==============#
# Note: add Huber-White robust standard errors, clustered by county

texreg(list(newvoter_biden1, newvoter_biden1c, newvoter_biden2, newvoter_biden2c),
       caption="Logistic Regression Models for New Voters Voting for Biden in the 2020 Presidential Election",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("newvoter_biden1", "newvoter_biden1c", "newvoter_biden2", "newvoter_biden2c"),
       override.se=list(summary(newvoter_biden1)$coef[,2],
                        summary(newvoter_biden1c)$coef[,2],
                        summary(newvoter_biden2)$coef[,2],
                        summary(newvoter_biden2c)$coef[,2],
                        override.pval=list(summary(newvoter_biden1)$coef[,4],
                                           summary(newvoter_biden1c)$coef[,4],
                                           summary(newvoter_biden2)$coef[,4],
                                           summary(newvoter_biden2c)$coef[,4])))


#===========#
# Wald test #
#===========#
wald.test(Sigma = vcov(newvoter_biden2c), b = coef(newvoter_biden2c), Terms = 1:3)
wald.test(Sigma = vcov(newvoter_biden2c), b = coef(newvoter_biden2c), Terms = 4:5)
wald.test(Sigma = vcov(newvoter_biden2c), b = coef(newvoter_biden2c), Terms = 6:7)
wald.test(Sigma = vcov(newvoter_biden2c), b = coef(newvoter_biden2c), Terms = 1:7)





#=======================================#
# Figure 1: Plot for New Voters Model 4 #
#=======================================#

plot_data_newvoters <- read.csv("/Users/Enrijeta/Dropbox/VoteSwitchers/psrm_replication/predicted_probabilities_STATA/plot_data_newvoters.csv")
plot_data_newvoters$party_id <- factor(plot_data_newvoters$party_id)

# ======================================================================================================= #

# Figure 1 #

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

# Make them factors to preserve order in x-axis as desired. 
plot_data_newvoters$factor <- factor(plot_data_newvoters$factor, levels = c("scotus_rbg", "economy_socio_rescaled",
                                                                            "healthcare_rescaled", "immigration_rescaled"))

newvoters_plot <- ggplot(plot_data_newvoters, aes(x = factor, y = AME, colour = party_id)) + 
  
  # add the coefficients
  geom_point(aes(shape = party_id), position = pd, size=2.5) + 
  
  # remove the automatically generated legend
  guides(shape=FALSE) + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = lower, ymax = upper,
      colour = party_id, width = 0.0
    ), size=1, position = pd) + theme_light() + 
  
  # add the horizontal line
  geom_hline(yintercept = 0.0, linetype="dashed", 
             color = "black", size = 0.8) +
  
  # break y-axis every 0.25 and show it in percentages 
  scale_y_continuous(breaks=seq(-0.10, 0.35, by = 0.05), limits=c(-0.10, 0.35)) + 
  #, labels = function(x) paste0(x*100, "%"), limits=c(0, 0.5)) + 
  xlab("") +
  ylab("Change in Probability New Biden Voter") + 
  scale_x_discrete(breaks = c("scotus_rbg", "economy_socio_rescaled", "healthcare_rescaled", "immigration_rescaled"), 
                   labels = c("RBG Replacement", "Sociotropic Economy", "Healthcare", "Immigration")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17)))) +
  
  # different line colors 
  scale_color_manual(name = "",
                     values = c("3" = "red", "2" = "blue", "1" = "purple"), 
                     breaks = c("1", "2", "3"), 
                     labels = c("Independent", "Democrat", "Republican")) + 
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 12), legend.position = "bottom") 


newvoters_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/newvoters_plot1.pdf", width = 6.5, height = 6)



#===============================================#
# Table 5: Switchers to major parties: O ---> R #
#===============================================#

# Current issues only
other_trump_sw_cces1 <- glm(other_trump_switchers ~ contracted_covid + police + scotus_rbg,
                            data = cces20, family = binomial)
summary(other_trump_sw_cces1)

# Huber-White robust standard errors, clustered by county
coeftest(other_trump_sw_cces1, vcov=vcovHC(other_trump_sw_cces1, type="HC0", cluster="inputstate"))


# Current issues with controls
other_trump_sw_cces1c <- glm(other_trump_switchers ~ contracted_covid + police + scotus_rbg +
                               political_awareness  + party_id + ideology + age + female + race_cat + 
                               education + income, data = cces20, family = binomial)
summary(other_trump_sw_cces1c)

# Huber-White robust standard errors, clustered by county
coeftest(other_trump_sw_cces1c, vcov=vcovHC(other_trump_sw_cces1c, type="HC0", cluster="inputstate"))


# Current and long term issues only
other_trump_sw_cces2 <- glm(other_trump_switchers ~ contracted_covid +  police + scotus_rbg + 
                              economy_pocketbook_rescaled + economy_socio_rescaled +  
                              healthcare_rescaled + immigration_rescaled, data = cces20, family = binomial)
summary(other_trump_sw_cces2)

# Huber-White robust standard errors, clustered by county
coeftest(other_trump_sw_cces2, vcov=vcovHC(other_trump_sw_cces2, type="HC0", cluster="inputstate"))


# Current and long term issues only with controls
other_trump_sw_cces2c <- glm(other_trump_switchers ~ contracted_covid + police + scotus_rbg + 
                               economy_pocketbook_rescaled + economy_socio_rescaled +  
                               healthcare_rescaled + immigration_rescaled + political_awareness + 
                               party_id + ideology + age +  female + race_cat + education + income, 
                             data = cces20, family = binomial)
summary(other_trump_sw_cces2c)

# Huber-White robust standard errors, clustered by county
coeftest(other_trump_sw_cces2c, vcov=vcovHC(other_trump_sw_cces2c, type="HC0", cluster="inputstate"))


#=============================================#
# Table: Switchers to major parties: O ---> D #
#=============================================#

# Current issues only
other_biden_sw_cces1 <- glm(other_dem_switchers ~ contracted_covid + police + scotus_rbg,
                            data = cces20, family = binomial)
summary(other_biden_sw_cces1)

# Huber-White robust standard errors, clustered by county
coeftest(other_biden_sw_cces1, vcov=vcovHC(other_biden_sw_cces1, type="HC0", cluster="inputstate"))


# Current issues with controls
other_biden_sw_cces1c <- glm(other_dem_switchers ~ contracted_covid + police + scotus_rbg +
                               political_awareness  + party_id + ideology + age + female + race_cat + 
                               education + income, data = cces20, family = binomial)
summary(other_biden_sw_cces1c)

# Huber-White robust standard errors, clustered by county
coeftest(other_biden_sw_cces1c, vcov=vcovHC(other_biden_sw_cces1c, type="HC0", cluster="inputstate"))


# Current and long term issues only
other_biden_sw_cces2 <- glm(other_dem_switchers ~ contracted_covid +  police + scotus_rbg + 
                              economy_pocketbook_rescaled + economy_socio_rescaled + 
                              healthcare_rescaled + immigration_rescaled, data = cces20, family = binomial)
summary(other_biden_sw_cces2)

# Huber-White robust standard errors, clustered by county
coeftest(other_biden_sw_cces2, vcov=vcovHC(other_biden_sw_cces2, type="HC0", cluster="inputstate"))


# Current and long term issues only with controls
other_biden_sw_cces2c <- glm(other_dem_switchers ~ contracted_covid + police + scotus_rbg + 
                               economy_pocketbook_rescaled + economy_socio_rescaled +  
                               healthcare_rescaled + immigration_rescaled + 
                               political_awareness  + party_id + ideology + age + 
                               female + race_cat + education + income, data = cces20, family = binomial)
summary(other_biden_sw_cces2c)


# Huber-White robust standard errors, clustered by county
coeftest(other_biden_sw_cces2c, vcov=vcovHC(other_biden_sw_cces2c, type="HC0", cluster="inputstate"))


#==============#
# Model Output #
#==============#
# Note: add Huber-White robust standard errors, clustered by county

texreg(list(other_trump_sw_cces1, other_trump_sw_cces1c, other_trump_sw_cces2, other_trump_sw_cces2c,
            other_biden_sw_cces1, other_biden_sw_cces1c, other_biden_sw_cces2, other_biden_sw_cces2c),
       caption="Logistic Regression Models for Switchers to Major Party Voting for Biden (Trump) in the 2020 Presidential Election",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("other_trump_sw_cces1", "other_trump_sw_cces1c", "other_trump_sw_cces2", "other_trump_sw_cces2c",
                     "other_biden_sw_cces1", "other_biden_sw_cces1c", "other_biden_sw_cces2", "other_biden_sw_cces2c"),
       override.se=list(summary(other_trump_sw_cces1)$coef[,2],
                        summary(other_trump_sw_cces1c)$coef[,2],
                        summary(other_trump_sw_cces2)$coef[,2],
                        summary(other_trump_sw_cces2c)$coef[,2],
                        summary(other_biden_sw_cces1)$coef[,2],
                        summary(other_biden_sw_cces1c)$coef[,2],
                        summary(other_biden_sw_cces2)$coef[,2],
                        summary(other_biden_sw_cces2c)$coef[,2],
                        override.pval=list(summary(other_trump_sw_cces1)$coef[,4],
                                           summary(other_trump_sw_cces1c)$coef[,4],
                                           summary(other_trump_sw_cces2)$coef[,4],
                                           summary(other_trump_sw_cces2c)$coef[,4],
                                           summary(other_biden_sw_cces1)$coef[,4],
                                           summary(other_biden_sw_cces1c)$coef[,4],
                                           summary(other_biden_sw_cces2)$coef[,4],
                                           summary(other_biden_sw_cces2c)$coef[,4])))



#===========#
# Wald test #
#===========#
wald.test(Sigma = vcov(other_trump_sw_cces2c), b = coef(other_trump_sw_cces2c), Terms = 1:3)
wald.test(Sigma = vcov(other_trump_sw_cces2c), b = coef(other_trump_sw_cces2c), Terms = 4:5)
wald.test(Sigma = vcov(other_trump_sw_cces2c), b = coef(other_trump_sw_cces2c), Terms = 6:7)
wald.test(Sigma = vcov(other_trump_sw_cces2c), b = coef(other_trump_sw_cces2c), Terms = 1:7)



#===========#
# Wald test #
#===========#
wald.test(Sigma = vcov(other_biden_sw_cces2c), b = coef(other_biden_sw_cces2c), Terms = 1:3)
wald.test(Sigma = vcov(other_biden_sw_cces2c), b = coef(other_biden_sw_cces2c), Terms = 4:5)
wald.test(Sigma = vcov(other_biden_sw_cces2c), b = coef(other_biden_sw_cces2c), Terms = 6:7)
wald.test(Sigma = vcov(other_biden_sw_cces2c), b = coef(other_biden_sw_cces2c), Terms = 1:7)



#=====================================================================================#
# Figure 2: Predicted probabilities plot for Switcher to Major Party O --> R, Model 4 #
#=====================================================================================#

plot_data_other_r <- read.csv("/Users/Enrijeta/Dropbox/VoteSwitchers/psrm_replication/predicted_probabilities_STATA/plot_data_other_r.csv")
plot_data_other_r$party_id <- factor(plot_data_other_r$party_id)

# ======================================================================================================= #

# Figure 2(a)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

# Make them factors to preserve order in x-axis as desired. 
plot_data_other_r$factor <- factor(plot_data_other_r$factor, levels = c("scotus_rbg", "economy_socio_rescaled", 
                                                                        "healthcare_rescaled", "immigration_rescaled"))

other_r_plot <- ggplot(plot_data_other_r, aes(x = factor, y = AME, colour = party_id)) + 
  
  # add the coefficients
  geom_point(aes(shape = party_id), position = pd, size=2.5) + 
  
  # remove the automatically generated legend
  guides(shape=FALSE) + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = lower, ymax = upper,
      colour = party_id, width = 0.0
    ), size=1, position = pd) + theme_light() + 
  
  # add the horizontal line
  geom_hline(yintercept = 0.0, linetype="dashed", 
             color = "black", size = 0.8) +
  
  # break y-axis every 0.25 and show it in percentages 
  scale_y_continuous(breaks=seq(-0.5, 0.7, by = 0.1), limits=c(-0.5, 0.7)) + 
  #, labels = function(x) paste0(x*100, "%"), limits=c(0, 0.5)) + 
  xlab("") +
  ylab("Change in Probability Switcher") + 
  scale_x_discrete(breaks = c("scotus_rbg", "economy_socio_rescaled", "healthcare_rescaled", "immigration_rescaled"), 
                   labels = c("RBG Replacement", "Sociotropic Economy", "Healthcare", "Immigration")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17)))) +
  
  # different line colors 
  scale_color_manual(name = "",
                     values = c("3" = "red", "2" = "blue", "1" = "purple"), 
                     breaks = c("1", "2", "3"), 
                     labels = c("Independent", "Democrat", "Republican")) + 
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 12), legend.position = "bottom") 


other_r_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/switcherOR_plot1.pdf", width = 6.5, height = 6)


#====================================================#
# Figure 2: Switcher to Major Party O --> D, Model 4 #
#====================================================#

plot_data_other_d <- read.csv("/Users/Enrijeta/Dropbox/VoteSwitchers/psrm_replication/predicted_probabilities_STATA/plot_data_other_d.csv")
plot_data_other_d$party_id <- factor(plot_data_other_d$party_id)

# ======================================================================================================= #

# Figure 2(b)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

# Make them factors to preserve order in x-axis as desired. 
plot_data_other_d$factor <- factor(plot_data_other_d$factor, levels = c("scotus_rbg", "economy_socio_rescaled", 
                                                                        "healthcare_rescaled", "immigration_rescaled"))

other_d_plot <- ggplot(plot_data_other_d, aes(x = factor, y = AME, colour = party_id)) + 
  
  # add the coefficients
  geom_point(aes(shape = party_id), position = pd, size=2.5) + 
  
  # remove the automatically generated legend
  guides(shape=FALSE) + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = lower, ymax = upper,
      colour = party_id, width = 0.0
    ), size=1, position = pd) + theme_light() + 
  
  # add the horizontal line
  geom_hline(yintercept = 0.0, linetype="dashed", 
             color = "black", size = 0.8) +
  
  # break y-axis every 0.25 and show it in percentages 
  scale_y_continuous(breaks=seq(-0.5, 0.7, by = 0.1), limits=c(-0.5, 0.7)) + 
  #, labels = function(x) paste0(x*100, "%"), limits=c(0, 0.5)) + 
  xlab("") +
  ylab("Change in Probability Switcher") + 
  scale_x_discrete(breaks = c("scotus_rbg", "economy_socio_rescaled", "healthcare_rescaled", "immigration_rescaled"), 
                   labels = c("RBG Replacement", "Sociotropic Economy", "Healthcare", "Immigration")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17)))) +
  
  # different line colors 
  scale_color_manual(name = "",
                     values = c("3" = "red", "2" = "blue", "1" = "purple"), 
                     breaks = c("1", "2", "3"), 
                     labels = c("Independent", "Democrat", "Republican")) + 
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 12), legend.position = "bottom") 


other_d_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/switcherOD_plot1.pdf", width = 6.5, height = 6)




#============================#
# Table 6: Switchers R---> D #
#============================#

# Current issues only
trump_sw_cces1 <- glm(trump_switchers ~ contracted_covid + police + scotus_rbg, 
                      data = cces20, family = binomial)
summary(trump_sw_cces1)

# Huber-White robust standard errors, clustered by county
coeftest(trump_sw_cces1, vcov=vcovHC(trump_sw_cces1, type="HC0", cluster="inputstate"))


# Current issues with controls
trump_sw_cces1c <- glm(trump_switchers ~ contracted_covid + police + scotus_rbg + 
                         political_awareness + party_id + ideology + age + 
                         female + race_cat + education + income, data = cces20, family = binomial)
summary(trump_sw_cces1c)

# Huber-White robust standard errors, clustered by county
coeftest(trump_sw_cces1c, vcov=vcovHC(trump_sw_cces1c, type="HC0", cluster="inputstate"))



# Current and long term issues only
trump_sw_cces2 <- glm(trump_switchers ~ contracted_covid + police + scotus_rbg + 
                        economy_pocketbook_rescaled + economy_socio_rescaled + 
                        healthcare_rescaled + immigration_rescaled, data = cces20, family = binomial)
summary(trump_sw_cces2)

# Huber-White robust standard errors, clustered by county
coeftest(trump_sw_cces2, vcov=vcovHC(trump_sw_cces2, type="HC0", cluster="inputstate"))



# Current and long term issues only with controls
trump_sw_cces2c <- glm(trump_switchers ~ contracted_covid + police + scotus_rbg + economy_pocketbook_rescaled + 
                         economy_socio_rescaled + healthcare_rescaled + immigration_rescaled + 
                         political_awareness + party_id + ideology + age + female + race_cat + education + 
                         income, data = cces20, family = binomial)
summary(trump_sw_cces2c)

# Huber-White robust standard errors, clustered by county
coeftest(trump_sw_cces2c, vcov=vcovHC(trump_sw_cces2c, type="HC0", cluster="inputstate"))



#===============================#
# Correlation matrix for issues #
#===============================#
library("Hmisc")
cces20 <- data.table(cces20)
cor_dta <- cces20[, c("contracted_covid", "police", "scotus_rbg", "economy_pocketbook_rescaled", 
                      "economy_socio_rescaled", "healthcare_rescaled", "immigration_rescaled", "pid3")]

# There's some multicollinearity issues with scotus_rbg
res2 <- rcorr(as.matrix(cor_dta))
res2


#===========#
# Wald test #
#===========#
wald.test(Sigma = vcov(trump_sw_cces2c), b = coef(trump_sw_cces2c), Terms = 1:3)
wald.test(Sigma = vcov(trump_sw_cces2c), b = coef(trump_sw_cces2c), Terms = 4:5)
wald.test(Sigma = vcov(trump_sw_cces2c), b = coef(trump_sw_cces2c), Terms = 6:7)
wald.test(Sigma = vcov(trump_sw_cces2c), b = coef(trump_sw_cces2c), Terms = 1:7)



#============================#
# Table 6: Switchers D---> R #
#============================#

# model (5): Current issues only
biden_sw_cces1 <- glm(dem_switchers ~ contracted_covid + police + scotus_rbg, 
                      data = cces20, family = binomial)
summary(biden_sw_cces1)

# Huber-White robust standard errors, clustered by county
coeftest(biden_sw_cces1, vcov=vcovHC(biden_sw_cces1, type="HC0", cluster="inputstate"))


# model (6): Current issues with controls
biden_sw_cces1c <- glm(dem_switchers ~ contracted_covid + police + scotus_rbg + 
                         political_awareness + party_id + ideology + age + 
                         female + race_cat + education + income, data = cces20, family = binomial)
summary(biden_sw_cces1c)

# Huber-White robust standard errors, clustered by county
coeftest(biden_sw_cces1c, vcov=vcovHC(biden_sw_cces1c, type="HC0", cluster="inputstate"))



# model (7): Current and long term issues only
biden_sw_cces2 <- glm(dem_switchers ~ contracted_covid + police + scotus_rbg + 
                        economy_pocketbook_rescaled + economy_socio_rescaled + 
                        healthcare_rescaled + immigration_rescaled, data = cces20, family = binomial)
summary(biden_sw_cces2)

# Huber-White robust standard errors, clustered by county
coeftest(biden_sw_cces2, vcov=vcovHC(biden_sw_cces2, type="HC0", cluster="inputstate"))



# model (8): Current and long term issues only with controls
biden_sw_cces2c <- glm(dem_switchers ~ contracted_covid + police + scotus_rbg + 
                         economy_pocketbook_rescaled + economy_socio_rescaled + 
                         healthcare_rescaled + immigration_rescaled + political_awareness + 
                         party_id + ideology + age + female + race_cat + education + income, 
                       data = cces20, family = binomial)
summary(biden_sw_cces2c)

# Huber-White robust standard errors, clustered by county
coeftest(biden_sw_cces2c, vcov=vcovHC(biden_sw_cces2c, type="HC0", cluster="inputstate"))




#==============#
# Model Output #
#==============#
# Note: add Huber-White robust standard errors, clustered by county

texreg(list(trump_sw_cces1, trump_sw_cces1c, trump_sw_cces2, trump_sw_cces2c, 
            biden_sw_cces1, biden_sw_cces1c, biden_sw_cces2, biden_sw_cces2c),
       caption="Logistic Regression Models for Switchers Voting for Biden (Trump) in the 2020 Presidential Election",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("trump_sw_cces1", "trump_sw_cces1c", "trump_sw_cces2", "trump_sw_cces2c",
                     "biden_sw_cces1", "biden_sw_cces1c", "biden_sw_cces2", "biden_sw_cces2c"),
       override.se=list(summary(trump_sw_cces1)$coef[,2],
                        summary(trump_sw_cces1c)$coef[,2],
                        summary(trump_sw_cces2)$coef[,2],
                        summary(trump_sw_cces2c)$coef[,2],
                        summary(biden_sw_cces1)$coef[,2],
                        summary(biden_sw_cces1c)$coef[,2],
                        summary(biden_sw_cces2)$coef[,2],
                        summary(biden_sw_cces2c)$coef[,2],
                        override.pval=list(summary(trump_sw_cces1)$coef[,4],
                                           summary(trump_sw_cces1c)$coef[,4],
                                           summary(trump_sw_cces2)$coef[,4],
                                           summary(trump_sw_cces2c)$coef[,4],
                                           summary(biden_sw_cces1)$coef[,4],
                                           summary(biden_sw_cces1c)$coef[,4],
                                           summary(biden_sw_cces2)$coef[,4],
                                           summary(biden_sw_cces2c)$coef[,4])))


#============#
# Wald test  #
#============#
wald.test(Sigma = vcov(biden_sw_cces2c), b = coef(biden_sw_cces2c), Terms = 1:3)
wald.test(Sigma = vcov(biden_sw_cces2c), b = coef(biden_sw_cces2c), Terms = 4:5)
wald.test(Sigma = vcov(biden_sw_cces2c), b = coef(biden_sw_cces2c), Terms = 6:7)
wald.test(Sigma = vcov(biden_sw_cces2c), b = coef(biden_sw_cces2c), Terms = 1:7)



##=========================================================================================##
# Export variables needed to estimate predicted probabilities with observed values in STATA #
##=========================================================================================##

predprob_dta <- cces20[, c("trump_switchers", "dem_switchers", "other_trump_switchers", 
                           "other_dem_switchers", "biden_2020", "contracted_covid", "police", 
                           "scotus_rbg", "economy_pocketbook_rescaled", "economy_socio_rescaled", 
                           "healthcare_rescaled", "immigration_rescaled", "political_awareness", 
                           "party_id", "ideology", "age", "female", "race_cat", "education", "income")]

predprob_dta <- write.csv(predprob_dta, file = "/Users/Enrijeta/Dropbox/VoteSwitchers/ObservedValues_predprob/ces2020.csv")

# Run the STATA code for predicted probabilities with observed values then run the plots


#=================================================================#
#=================================================================#
# See Stata code for predicted probabilities with observed values #
#=================================================================#
#=================================================================#


#===========================================================#
# Figure 3: Import Stata output for predicted probabilities #  
#===========================================================#

plot_data_rd <- read.csv("/Users/Enrijeta/Dropbox/VoteSwitchers/psrm_replication/predicted_probabilities_STATA/plot_data_rd.csv")
plot_data_rd$party_id <- factor(plot_data_rd$party_id)

# ======================================================================================================= #

# Figure 3(a)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

# Make them factors to preserve order in x-axis as desired. 
plot_data_rd$factor <- factor(plot_data_rd$factor, levels = c("scotus_rbg", "economy_socio_rescaled", 
                                                              "healthcare_rescaled", "immigration_rescaled"))

swichers_rd_plot <- ggplot(plot_data_rd, aes(x = factor, y = AME, colour = party_id)) + 
  
  # add the coefficients
  geom_point(aes(shape = party_id), position = pd, size=2.5) + 
  
  # remove the automatically generated legend
  guides(shape=FALSE) + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = lower, ymax = upper,
      colour = party_id, width = 0.0
    ), size=1, position = pd) + theme_light() + 
  
  # add the horizontal line
  geom_hline(yintercept = 0.0, linetype="dashed", 
             color = "black", size = 0.8) +
  
  # break y-axis every 0.25 and show it in percentages 
  scale_y_continuous(breaks=seq(-0.2, 0.2, by = 0.05), limits=c(-0.2, 0.2)) + 
  #, labels = function(x) paste0(x*100, "%"), limits=c(0, 0.5)) + 
  xlab("") +
  ylab("Change in Probability Switcher (R-->D)") + 
  scale_x_discrete(breaks = c("scotus_rbg", "economy_socio_rescaled", "healthcare_rescaled", "immigration_rescaled"), 
                   labels = c("RBG Replacement", "Sociotropic Economy", "Healthcare", "Immigration")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17)))) +
  
  # different line colors 
  scale_color_manual(name = "",
                     values = c("3" = "red", "2" = "blue", "1" = "purple"), 
                     breaks = c("1", "2", "3"), 
                     labels = c("Independent", "Democrat", "Republican")) + 
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 12), legend.position = "bottom") 


swichers_rd_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/switcherRD_plot1.pdf", width = 6.5, height = 6)



#====================================================#
# Figure 3: Switcher to Major Party D --> R, Model 4 #
#====================================================#

plot_data_dr <- read.csv("/Users/Enrijeta/Dropbox/VoteSwitchers/psrm_replication/predicted_probabilities_STATA/plot_data_dr.csv")
plot_data_dr$party_id <- factor(plot_data_dr$party_id)
# ======================================================================================================= #

# Figure 3(b)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

# Make them factors to preserve order in x-axis as desired. 
plot_data_dr$factor <- factor(plot_data_dr$factor, levels = c("scotus_rbg", "economy_socio_rescaled", 
                                                              "healthcare_rescaled", "immigration_rescaled"))

swichers_dr_plot <- ggplot(plot_data_dr, aes(x = factor, y = AME, colour = party_id)) + 
  
  # add the coefficients
  geom_point(aes(shape = party_id), position = pd, size=2.5) + 
  
  # remove the automatically generated legend
  guides(shape=FALSE) + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = lower, ymax = upper,
      colour = party_id, width = 0.0
    ), size=1, position = pd) + theme_light() + 
  
  # add the horizontal line
  geom_hline(yintercept = 0.0, linetype="dashed", 
             color = "black", size = 0.8) +
  
  # break y-axis every 0.25 and show it in percentages 
  scale_y_continuous(breaks=seq(-0.2, 0.2, by = 0.05), limits=c(-0.2, 0.2)) + 
  #, labels = function(x) paste0(x*100, "%"), limits=c(0, 0.5)) + 
  xlab("") +
  ylab("Change in Probability Switcher (D-->R)") + 
  scale_x_discrete(breaks = c("scotus_rbg", "economy_socio_rescaled", "healthcare_rescaled" ,"immigration_rescaled"), 
                   labels = c("RBG Replacement", "Sociotropic Economy", "Healthcare", "Immigration")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17)))) +
  
  # different line colors 
  scale_color_manual(name = "",
                     values = c("3" = "red", "2" = "blue", "1" = "purple"), 
                     breaks = c("1", "2", "3"), 
                     labels = c("Independent", "Democrat", "Republican")) + 
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 12), legend.position = "bottom") 


swichers_dr_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/switcherDR_plot1.pdf", width = 6.5, height = 6)






#===========================================#
#===========================================#
# APPENDIX: Variable Descriptive Statistics #
#===========================================#
#===========================================#

# Appendix B: Table 3
summary(cces20$biden_2020)
sd(cces20$biden_2020, na.rm = T)

summary(cces20$other_trump_switchers)
sd(cces20$other_trump_switchers, na.rm = T)

summary(cces20$other_dem_switchers)
sd(cces20$other_dem_switchers, na.rm = T)

summary(cces20$trump_switchers)
sd(cces20$trump_switchers, na.rm = T)

summary(cces20$dem_switchers)
sd(cces20$dem_switchers, na.rm = T)



# Main Issues 
summary(cces20$contracted_covid)
sd(cces20$contracted_covid, na.rm = T)

summary(cces20$police)
sd(cces20$police, na.rm = T)

summary(cces20$scotus_rbg)
sd(cces20$scotus_rbg, na.rm = T)

summary(cces20$economy_pocketbook_rescaled)
sd(cces20$economy_pocketbook_rescaled, na.rm = T)

summary(cces20$economy_socio_rescaled)
sd(cces20$economy_socio_rescaled, na.rm = T)

summary(cces20$healthcare_rescaled)
sd(cces20$healthcare_rescaled, na.rm = T)

summary(cces20$immigration_rescaled)
sd(cces20$immigration_rescaled, na.rm = T)


# Controls
summary(cces20$political_awareness)
sd(cces20$political_awareness, na.rm = T)

summary(as.numeric(cces20$party_id))
sd(as.numeric(cces20$party_id), na.rm = T)

summary(cces20$ideology)
sd(cces20$ideology, na.rm = T)

summary(cces20$age)
sd(cces20$age, na.rm = T)

summary(cces20$female)
sd(cces20$female, na.rm = T)

summary(as.numeric(cces20$race_cat))
sd(as.numeric(cces20$race_cat), na.rm = T)

summary(cces20$education)
sd(cces20$education, na.rm = T)

summary(cces20$income)
sd(cces20$income, na.rm = T)

summary(cces20$young_nonvoter16)
sd(cces20$young_nonvoter16, na.rm = T)


##==============================================================#
# Appendix D: ROC comparing demographics only vs. full model 4  #
##==============================================================#


# New Voters Vote Choice (Biden = 1) #

# Current and long term issues only with controls
newvoter_biden2d <- glm(biden_2020 ~ political_awareness +  party_id + ideology + age + 
                          young_nonvoter16 + female + race_cat + education + income + inputstate, 
                        data = cces20, family = binomial)
summary(newvoter_biden2d)

# Obtain predicted probabilities 
pred_prob_table4_0 <- predict(newvoter_biden2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted0 <- data.frame(cces20$biden_2020, pred_prob_table4_0, "New voters 2020 --> Biden Voter 2020 (demographics only)")




# Current and long term issues only with controls
newvoter_biden2c <- glm(biden_2020 ~ contracted_covid + police + scotus_rbg + 
                          economy_pocketbook_rescaled + economy_socio_rescaled + healthcare_rescaled + 
                          immigration_rescaled +  political_awareness + 
                          party_id + ideology + age + young_nonvoter16 + female + 
                          race_cat + education + income, data = cces20, family = binomial)
summary(newvoter_biden2c)


# Obtain predicted probabilities 
pred_prob_table4_00 <- predict(newvoter_biden2c, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted00 <- data.frame(cces20$biden_2020, pred_prob_table4_00, "New voters 2020 --> Biden Voter 2020 (full model)")



## Trump switchers ##

# model demogrpahics only
trump_sw_cces2d <- glm(trump_switchers ~ political_awareness + party_id + ideology + age + 
                         female + race_cat + education + income, data = cces20, family = binomial)
summary(trump_sw_cces2d)

# Obtain predicted probabilities 
pred_prob_table4_1 <- predict(trump_sw_cces2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted1 <- data.frame(cces20$trump_switchers, pred_prob_table4_1, "Trump Voter 2016 --> Biden Voter 2020 (demographics only)")


# Current and long term issues only with controls
trump_sw_cces2c <- glm(trump_switchers ~ contracted_covid + police + scotus_rbg + economy_pocketbook_rescaled + 
                         economy_socio_rescaled + healthcare_rescaled + immigration_rescaled + 
                         political_awareness + party_id + ideology + age + female + race_cat + education + 
                         income, data = cces20, family = binomial)
summary(trump_sw_cces2c)

# Obtain predicted probabilities 
pred_prob_table4_2 <- predict(trump_sw_cces2c, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted11 <- data.frame(cces20$trump_switchers, pred_prob_table4_2, "Trump Voter 2016 --> Biden Voter 2020 (full model)")



#==========================#
# Table: Switchers D---> R #
#==========================#

# demographics only
biden_sw_cces2d <- glm(dem_switchers ~political_awareness + party_id + ideology + age + 
                         female + race_cat + education + income, data = cces20, family = binomial)
summary(biden_sw_cces2d)

# Obtain predicted probabilities 
pred_prob_table5_2 <- predict(biden_sw_cces2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted2 <- data.frame(cces20$dem_switchers, pred_prob_table5_2, "Clinton Voter 2016 --> Trump Voter 2020 (demographics only)")



# Current and long term issues only with controls
biden_sw_cces2c <- glm(dem_switchers ~ contracted_covid + police + scotus_rbg + 
                         economy_pocketbook_rescaled + economy_socio_rescaled + 
                         healthcare_rescaled + immigration_rescaled + political_awareness + 
                         party_id + ideology + age + female + race_cat + education + 
                         income, data = cces20, family = binomial)
summary(biden_sw_cces2c)


# Obtain predicted probabilities 
pred_prob_table5_3 <- predict(biden_sw_cces2c, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted22 <- data.frame(cces20$dem_switchers, pred_prob_table5_3, "Clinton Voter 2016 --> Trump Voter 2020 (full model)")



#=============================================#
# Table: Switchers to major parties: O ---> R #
#=============================================#


# demographics only controls
other_trump_sw_cces2d <- glm(other_trump_switchers ~ political_awareness + party_id + 
                               ideology + age +  female + race_cat + education + income, 
                             data = cces20, family = binomial)
summary(other_trump_sw_cces2d)

# Obtain predicted probabilities 
pred_prob_table6_2 <- predict(other_trump_sw_cces2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted3 <- data.frame(cces20$other_trump_switchers, pred_prob_table6_2, "Third Party Voter 2016 --> Trump Voter 2020 (demographics only)")



# Current and long term issues only with controls
other_trump_sw_cces2c <- glm(other_trump_switchers ~ contracted_covid + police + scotus_rbg + 
                               economy_pocketbook_rescaled + economy_socio_rescaled + 
                               healthcare_rescaled + immigration_rescaled + political_awareness + 
                               party_id + ideology + age + female + race_cat + education + income, 
                             data = cces20, family = binomial)
summary(other_trump_sw_cces2c)


# Obtain predicted probabilities 
pred_prob_table6_3 <- predict(other_trump_sw_cces2c, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted33 <- data.frame(cces20$other_trump_switchers, pred_prob_table6_3, 
                               "Third Party Voter 2016 --> Trump Voter 2020 (full model)")




#=============================================#
# Table: Switchers to major parties: O ---> D #
#=============================================#


# demographics only
other_biden_sw_cces2d <- glm(other_dem_switchers ~ political_awareness  + party_id + 
                               ideology + age + female + race_cat + education + income, 
                             data = cces20, family = binomial)


# Obtain predicted probabilities 
pred_prob_table7_2 <- predict(other_biden_sw_cces2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted4 <- data.frame(cces20$other_dem_switchers, pred_prob_table7_2, 
                              "Third Party Voter 2016 --> Biden Voter 2020 (demographics only)")


# Current and long term issues only with controls
other_biden_sw_cces2c <- glm(other_dem_switchers ~ contracted_covid + police + scotus_rbg + 
                               economy_pocketbook_rescaled + economy_socio_rescaled +  
                               healthcare_rescaled + immigration_rescaled + political_awareness + 
                               party_id + ideology + age + female + race_cat + education + 
                               income, data = cces20, family = binomial)


# Obtain predicted probabilities 
pred_prob_table7_3 <- predict(other_biden_sw_cces2d, newdata = cces20, type = "response")

# Add predicted value to our dataset 
data_predicted44 <- data.frame(cces20$other_dem_switchers, pred_prob_table7_3, 
                               "Third Party Voter 2016 --> Biden Voter 2020 (full model)")




# Prepare data for ggplot 
colnames(data_predicted0) <- c("y", "y_pred", "table")
colnames(data_predicted00) <- c("y", "y_pred", "table")
colnames(data_predicted1) <- c("y", "y_pred", "table")
colnames(data_predicted11) <- c("y", "y_pred", "table")
colnames(data_predicted2) <- c("y", "y_pred", "table")
colnames(data_predicted22) <- c("y", "y_pred", "table")
colnames(data_predicted3) <- c("y", "y_pred", "table")
colnames(data_predicted33) <- c("y", "y_pred", "table")
colnames(data_predicted4) <- c("y", "y_pred", "table")
colnames(data_predicted44) <- c("y", "y_pred", "table")


# Rbind the data
data_predict <- rbind(data_predicted0, data_predicted00, data_predicted1, data_predicted11, 
                      data_predicted2, data_predicted22, data_predicted3, data_predicted33, 
                      data_predicted4, data_predicted44)

# Appendix D: Figure 1
AUR_plot <- ggplot(data_predict, aes(d = y, m = y_pred)) + geom_roc(labels = FALSE, n.cuts = 0) + 
  scale_x_continuous("1 - Specificity", breaks = seq(0, 1, by = .1)) + 
  scale_y_continuous("Sensitivity", breaks = seq(0, 1, by = .1)) + 
  geom_abline(slope = 1, intercept = 0, linetype = 2, color = "black") + 
  facet_wrap(~ table, ncol = 2)  + theme_bw() 

AUR_plot

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote_Switchers/plots/r2_model_fit2.pdf", width = 8, height = 12)

# AUC scores
paste("AUC =", round(calc_auc(AUR_plot)$AUC, 2))
#  "AUC = 0.89" "AUC = 0.97" "AUC = 0.95" "AUC = 0.99" "AUC = 0.7"  "AUC = 0.7"  "AUC = 0.8"  "AUC = 0.89" "AUC = 0.86" "AUC = 0.97"


